##Introduction

This Rmarkdown file has a series of simulations run on the models offered in idealstan. It simulates a large amount of data and then checks to see if the residuals relative to the true parameters sum to zero, as well as for credible interval coverage. In general, the credible intervals aren’t going to be exactly where they should be because the constraints used in estimating the models and identifying the signs aren’t themselves included in the simulated data. I.e., one of the ideal points actually follows a positive continuous distribution instead of a random normal distribution once it is prefix, which introduces a slight bias in the recovery of the true parameters. This is an artifact of virtually every latent variable model, and is not of great concern as long as the credible intervals reach close to 90 percent coverage.

First, the standard IRT 2-PL (for this one, the absence discriminations are not a part of the model):

bin_irt_2pl_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F)
bin_irt_2pl_est <- id_estimate(idealdata=bin_irt_2pl_sim,
                               model_type=1,init_pathfinder = TRUE,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                               restrict_ind_low = as.character(sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                    decreasing=F, 
                                                        index=T)$ix[1]),
                               fix_high = sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(bin_irt_2pl_sim@simul_data$true_person,
                                                                     decreasing=F)[1],

                               fixtype='prefix',
                           person_sd = 3,
                           nchains=2,ncores=4,niters = 500,warmup=500)
## [1] "Running pathfinder to find starting values"
## Finished in  0.6 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
## 
## Chain 2 finished in 14.9 seconds.
## Chain 1 finished in 15.0 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 15.0 seconds.
## Total execution time: 15.2 seconds.
id_plot_rhats(bin_irt_2pl_est)

id_sim_coverage(bin_irt_2pl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_est,show_true = T)

id_plot_sims(bin_irt_2pl_est,type='Residuals')

id_plot_sims(bin_irt_2pl_est)

new_data <- bin_irt_2pl_est@score_data@func_args$score_data

c1 <- id_post_pred(bin_irt_2pl_est, newdata=new_data)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 1000 samples."
## [1] "Now on model 1"
  id_plot_ppc(bin_irt_2pl_est,ppc_pred=c1,combine_item=FALSE,
              prompt_plot=FALSE)

## [[1]]
## [[1]][[1]]
## NULL
## 
## [[1]][[2]]
## NULL
## 
## [[1]][[3]]
## NULL
## 
## [[1]][[4]]
## NULL
## 
## [[1]][[5]]
## NULL
## 
## [[1]][[6]]
## NULL
## 
## [[1]][[7]]
## NULL
## 
## [[1]][[8]]
## NULL
## 
## [[1]][[9]]
## NULL
## 
## [[1]][[10]]
## NULL
## 
## [[1]][[11]]
## NULL
## 
## [[1]][[12]]
## NULL
## 
## [[1]][[13]]
## NULL
## 
## [[1]][[14]]
## NULL
## 
## [[1]][[15]]
## NULL
## 
## [[1]][[16]]
## NULL
## 
## [[1]][[17]]
## NULL
## 
## [[1]][[18]]
## NULL
## 
## [[1]][[19]]
## NULL
## 
## [[1]][[20]]
## NULL

IRT 2-PL Binary + Covariate

Try this same model but add an external covariate with an ideal point effect of +2

bin_irt_2pl_cov_sim <- id_sim_gen(num_person=100,num_items=20,ordinal=F,inflate=F,
                              cov_effect = 3)
bin_irt_2pl_cov_est <- id_estimate(idealdata=bin_irt_2pl_cov_sim,
                               model_type=1,init_pathfinder = TRUE,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                               restrict_ind_low = as.character(sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                    decreasing=F, 
                                                        index=T)$ix[1]),
                               fix_high = sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(bin_irt_2pl_cov_sim@simul_data$true_person,
                                                                     decreasing=F)[1],

                               fixtype='prefix',
                           person_sd = 3,
                           nchains=2,ncores=4,niters = 500,warmup=500)
## [1] "Running pathfinder to find starting values"
## Finished in  0.7 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
## 
## Chain 1 finished in 16.3 seconds.
## Chain 2 finished in 17.1 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 16.7 seconds.
## Total execution time: 17.1 seconds.
id_plot_rhats(bin_irt_2pl_cov_est)

id_sim_coverage(bin_irt_2pl_cov_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_cov_est,show_true = T)
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"

id_plot_sims(bin_irt_2pl_cov_est,type='Residuals')

id_plot_sims(bin_irt_2pl_cov_est)

new_data <- bin_irt_2pl_cov_est@score_data@func_args$score_data

c1 <- id_post_pred(bin_irt_2pl_cov_est, newdata=new_data)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 1000 samples."
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"
## [1] "Now on model 1"
  id_plot_ppc(bin_irt_2pl_cov_est,ppc_pred=c1,combine_item=FALSE,
              prompt_plot=FALSE)

## [[1]]
## [[1]][[1]]
## NULL
## 
## [[1]][[2]]
## NULL
## 
## [[1]][[3]]
## NULL
## 
## [[1]][[4]]
## NULL
## 
## [[1]][[5]]
## NULL
## 
## [[1]][[6]]
## NULL
## 
## [[1]][[7]]
## NULL
## 
## [[1]][[8]]
## NULL
## 
## [[1]][[9]]
## NULL
## 
## [[1]][[10]]
## NULL
## 
## [[1]][[11]]
## NULL
## 
## [[1]][[12]]
## NULL
## 
## [[1]][[13]]
## NULL
## 
## [[1]][[14]]
## NULL
## 
## [[1]][[15]]
## NULL
## 
## [[1]][[16]]
## NULL
## 
## [[1]][[17]]
## NULL
## 
## [[1]][[18]]
## NULL
## 
## [[1]][[19]]
## NULL
## 
## [[1]][[20]]
## NULL

Now check for covariate effects of ideal point covariates

First, plot estimated coefficient legis_x against true value:

mcmc_recover_hist(bin_irt_2pl_cov_est@stan_samples$draws("legis_x"),
                  true=bin_irt_2pl_cov_sim@simul_data$cov_effect)

eps <- 1e-4
  
  new_data1 <- mutate(bin_irt_2pl_cov_sim@score_matrix,
                      person_x = person_x - eps / 2)
  
  new_data2 <- mutate(bin_irt_2pl_cov_sim@score_matrix,
                      person_x = person_x + eps / 2)
  
  # now predict new outcome distributions given these slightly different datasets
  
  cov_mod_pred1 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data1,
                                 use_cores=4,draws="all",
                                item_subset=levels(new_data1$item),
                                 type="epred")
## [1] "Processing posterior replications for 2000 scores using all posterior samples out of a total of 1000 samples."
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"
## [1] "Now on model 1"
  cov_mod_pred2 <- id_post_pred(bin_irt_2pl_cov_est,newdata=new_data2,
                                 use_cores=4,draws="all",
                                item_subset=levels(new_data2$item),
                                 type="epred")
## [1] "Processing posterior replications for 2000 scores using all posterior samples out of a total of 1000 samples."
## [1] "Adding in hierarchical covariates values to the time-varying person scores."
## [1] "Collapsing covariates to person and time IDs."
## [1] "Done!"
## [1] "Now on model 1"
  # now we need to difference the datasets at the item level to calculate the effects
  # this will create some lists that will then be appended together to a big data frame
  # with one observation per posterior draw
  
    print("Looping over items")
## [1] "Looping over items"
    # difference the predictions
  
  c1 <- purrr::map2(cov_mod_pred1[[1]],
                    cov_mod_pred2[[1]],
                    function(small,big) {
                      
                      # difference the effects
                      
                      (big - small) / eps
                      
                    })
  
  # combine into datasets for each item with item id + person (senator) id
  
  c2 <- lapply(c1, function(mat) {
    
    out_data <- attr(mat, "data")
    colnames(mat) <- out_data$person_id
    
    as_tibble(mat) %>% 
      mutate(draws=1:n(),
             item_id=unique(out_data$item_id)) %>% 
      gather(key="person_id",value="estimate",-draws,-item_id) %>% 
      mutate(person_id=as.numeric(person_id),
             estimate=as.numeric(estimate))
    
  }) %>% bind_rows
  
  # merge in some original data
  to_merge <- mutate(bin_irt_2pl_cov_est@score_data@score_matrix, 
                     item_orig=item_id,
                     person_orig=person_id,
                     person_id=as.numeric(person_id),
                     item_id=as.numeric(item_id)) %>% 
    select(person_id, item_id, group_id,item_orig, person_orig) %>% 
    distinct
  
  # add in party data so we can calculate party-specific effects
  
  c2 <- left_join(c2, to_merge, 
                  by=c("item_id","person_id"))
  
  # get effect separately by democrats/republicans
  
   by_party <- group_by(c2, draws, group_id, item_id, item_orig) %>% 
    summarize(mean_est1=mean(estimate)) %>% 
    group_by(group_id, item_id, item_orig) %>% 
    summarize(mean_est=mean(mean_est1),
              low_est=quantile(mean_est1, .05),
              high_est=quantile(mean_est1, .95))
   
   # merge in item discrimination to add some color / show high-discrim votes
   
   item_discrim <- filter(bin_irt_2pl_cov_est@summary,
                         grepl(x=variable, pattern="sigma\\_reg\\_free")) %>% 
    mutate(item_id=as.numeric(stringr::str_extract(variable, "[0-9]+")))
   
   by_party <- left_join(by_party,
                        select(item_discrim, median, item_id))
   
   # plot the result
   
   by_party %>% 
    ggplot(aes(y=mean_est,
               x=reorder(item_id,mean_est))) +
    geom_linerange(aes(ymin=low_est,
                       ymax=high_est,
                       colour=`median`)) +
    ggthemes::theme_tufte() + 
    scale_colour_viridis_c(name="Discrimination") +
    coord_flip() +
    geom_hline(yintercept=0,linetype=2) +
    ggthemes::theme_clean() +
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank()) +
  theme(legend.position = "bottom")

Inflated 2-PL IRT (binary):

Next, inflated 2-PL IRT (binary):

bin_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T,
                               absence_diff_mean=0)
bin_irt_2pl_abs_est <- id_estimate(idealdata=bin_irt_2pl_abs_sim,
                               model_type=2,
                               restrict_ind_high = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high=sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],
                              fix_low=sort(bin_irt_2pl_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  0.9 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## 
## Chain 3 finished in 53.0 seconds.
## Chain 4 finished in 53.2 seconds.
## Chain 2 finished in 53.6 seconds.
## Chain 1 finished in 59.4 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 54.8 seconds.
## Total execution time: 59.5 seconds.
id_plot_rhats(bin_irt_2pl_abs_est)

Plot true versus observed values:

id_plot_legis(bin_irt_2pl_abs_est,show_true = T)

id_sim_coverage(bin_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(bin_irt_2pl_abs_est,type='Residuals')

id_plot_sims(bin_irt_2pl_abs_est)

bin_irt_2pl_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(bin_irt_2pl_abs_est,ppc_pred=.,combine_item=FALSE,prompt_plot=FALSE)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 2"

## [[1]]
## [[1]][[1]]
## NULL
## 
## [[1]][[2]]
## NULL
## 
## [[1]][[3]]
## NULL
## 
## [[1]][[4]]
## NULL
## 
## [[1]][[5]]
## NULL
## 
## [[1]][[6]]
## NULL
## 
## [[1]][[7]]
## NULL
## 
## [[1]][[8]]
## NULL
## 
## [[1]][[9]]
## NULL
## 
## [[1]][[10]]
## NULL
## 
## [[1]][[11]]
## NULL
## 
## [[1]][[12]]
## NULL
## 
## [[1]][[13]]
## NULL
## 
## [[1]][[14]]
## NULL
## 
## [[1]][[15]]
## NULL
## 
## [[1]][[16]]
## NULL
## 
## [[1]][[17]]
## NULL
## 
## [[1]][[18]]
## NULL
## 
## [[1]][[19]]
## NULL
## 
## [[1]][[20]]
## NULL
## 
## [[1]][[21]]
## NULL
## 
## [[1]][[22]]
## NULL
## 
## [[1]][[23]]
## NULL
## 
## [[1]][[24]]
## NULL
## 
## [[1]][[25]]
## NULL
## 
## [[1]][[26]]
## NULL
## 
## [[1]][[27]]
## NULL
## 
## [[1]][[28]]
## NULL
## 
## [[1]][[29]]
## NULL
## 
## [[1]][[30]]
## NULL
## 
## [[1]][[31]]
## NULL
## 
## [[1]][[32]]
## NULL
## 
## [[1]][[33]]
## NULL
## 
## [[1]][[34]]
## NULL
## 
## [[1]][[35]]
## NULL
## 
## [[1]][[36]]
## NULL
## 
## [[1]][[37]]
## NULL
## 
## [[1]][[38]]
## NULL
## 
## [[1]][[39]]
## NULL
## 
## [[1]][[40]]
## NULL
## 
## [[1]][[41]]
## NULL
## 
## [[1]][[42]]
## NULL
## 
## [[1]][[43]]
## NULL
## 
## [[1]][[44]]
## NULL
## 
## [[1]][[45]]
## NULL
## 
## [[1]][[46]]
## NULL
## 
## [[1]][[47]]
## NULL
## 
## [[1]][[48]]
## NULL
## 
## [[1]][[49]]
## NULL
## 
## [[1]][[50]]
## NULL
## 
## [[1]][[51]]
## NULL
## 
## [[1]][[52]]
## NULL
## 
## [[1]][[53]]
## NULL
## 
## [[1]][[54]]
## NULL
## 
## [[1]][[55]]
## NULL
## 
## [[1]][[56]]
## NULL
## 
## [[1]][[57]]
## NULL
## 
## [[1]][[58]]
## NULL
## 
## [[1]][[59]]
## NULL
## 
## [[1]][[60]]
## NULL
## 
## [[1]][[61]]
## NULL
## 
## [[1]][[62]]
## NULL
## 
## [[1]][[63]]
## NULL
## 
## [[1]][[64]]
## NULL
## 
## [[1]][[65]]
## NULL
## 
## [[1]][[66]]
## NULL
## 
## [[1]][[67]]
## NULL
## 
## [[1]][[68]]
## NULL
## 
## [[1]][[69]]
## NULL
## 
## [[1]][[70]]
## NULL
## 
## [[1]][[71]]
## NULL
## 
## [[1]][[72]]
## NULL
## 
## [[1]][[73]]
## NULL
## 
## [[1]][[74]]
## NULL
## 
## [[1]][[75]]
## NULL
## 
## [[1]][[76]]
## NULL
## 
## [[1]][[77]]
## NULL
## 
## [[1]][[78]]
## NULL
## 
## [[1]][[79]]
## NULL
## 
## [[1]][[80]]
## NULL
## 
## [[1]][[81]]
## NULL
## 
## [[1]][[82]]
## NULL
## 
## [[1]][[83]]
## NULL
## 
## [[1]][[84]]
## NULL
## 
## [[1]][[85]]
## NULL
## 
## [[1]][[86]]
## NULL
## 
## [[1]][[87]]
## NULL
## 
## [[1]][[88]]
## NULL
## 
## [[1]][[89]]
## NULL
## 
## [[1]][[90]]
## NULL
## 
## [[1]][[91]]
## NULL
## 
## [[1]][[92]]
## NULL
## 
## [[1]][[93]]
## NULL
## 
## [[1]][[94]]
## NULL
## 
## [[1]][[95]]
## NULL
## 
## [[1]][[96]]
## NULL
## 
## [[1]][[97]]
## NULL
## 
## [[1]][[98]]
## NULL
## 
## [[1]][[99]]
## NULL
## 
## [[1]][[100]]
## NULL

Now we’ll start with the ordinal models. First the uninflated ordinal model:

ord_irt_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
                               absence_diff_mean=0,model_type='ordinal_ratingscale')
ord_irt_est <- id_estimate(idealdata=ord_irt_sim,
                               model_type=3,
                               restrict_ind_high = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_sim@simul_data$true_person,decreasing=F,
                                                        index=T)$ix[1]),
                           fix_high=sort(ord_irt_sim@simul_data$true_person,decreasing=T)[1],
                           fix_low=sort(ord_irt_sim@simul_data$true_person,decreasing=F)[1],
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  1.2 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 1 finished in 135.5 seconds.
## Chain 4 finished in 161.1 seconds.
## Chain 2 finished in 164.9 seconds.
## Chain 3 finished in 167.0 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 157.1 seconds.
## Total execution time: 167.2 seconds.
id_plot_rhats(ord_irt_est)

Plot true versus observed values:

id_plot_legis(ord_irt_est,show_true = T)

id_sim_coverage(ord_irt_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_est,type='Residuals')

id_plot_sims(ord_irt_est)

ord_irt_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 3"
## [1] "Predicting outcomes for model ID 3."

And we will finish with the inflated ordinal model:

ord_irt_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='ordinal_ratingscale')
ord_irt_abs_est <- id_estimate(idealdata=ord_irt_abs_sim,
                               model_type=4,
                               restrict_ind_high = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  1.5 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 4 finished in 86.1 seconds.
## Chain 2 finished in 89.4 seconds.
## Chain 3 finished in 91.0 seconds.
## Chain 1 finished in 91.2 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 89.4 seconds.
## Total execution time: 91.3 seconds.
id_plot_rhats(ord_irt_abs_est)

id_plot_legis(ord_irt_abs_est,show_true = T)

id_sim_coverage(ord_irt_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_abs_est,type='Residuals')

id_plot_sims(ord_irt_abs_est)

ord_irt_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 4"
## [1] "Predicting outcomes for model ID 4."

We can now try the ordinal graded response version (non-inflated):

ord_irt_grm_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
                               absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_est <- id_estimate(idealdata=ord_irt_grm_sim,
                               model_type=5,
                               restrict_ind_high = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_grm_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  1.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 3 finished in 292.4 seconds.
## Chain 2 finished in 294.7 seconds.
## Chain 1 finished in 295.3 seconds.
## Chain 4 finished in 296.0 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 294.6 seconds.
## Total execution time: 296.2 seconds.
id_plot_rhats(ord_irt_grm_est)

id_sim_coverage(ord_irt_grm_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_legis(ord_irt_grm_est,show_true = T)

id_plot_sims(ord_irt_grm_est,type='Residuals')

id_plot_sims(ord_irt_grm_est)

ord_irt_grm_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_grm_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 5"
## [1] "Predicting outcomes for model ID 5."

And now the inflated GRM:

ord_irt_grm_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='ordinal_grm')
ord_irt_grm_abs_est <- id_estimate(idealdata=ord_irt_grm_abs_sim,
                               model_type=6,
                               restrict_ind_high = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_grm_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  1.4 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 1 finished in 166.7 seconds.
## Chain 4 finished in 179.4 seconds.
## Chain 2 finished in 187.9 seconds.
## Chain 3 finished in 195.5 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 182.4 seconds.
## Total execution time: 195.7 seconds.
id_plot_rhats(ord_irt_grm_abs_est)

id_plot_legis(ord_irt_grm_abs_est,show_true = T)

id_sim_coverage(ord_irt_grm_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_grm_abs_est,type='Residuals')

id_plot_sims(ord_irt_grm_abs_est)

ord_irt_grm_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_grm_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 6"
## [1] "Predicting outcomes for model ID 6."

Additional Models V0.3

Models I am adding to this release include the Poisson model:

ord_irt_poisson_sim <- id_sim_gen(num_person=20,num_items=100,inflate=F,
                                  absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_est <- id_estimate(idealdata=ord_irt_poisson_sim,
                               model_type=7,restrict_ind_high = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high=sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                     decreasing=T)[1],
                              fix_low=sort(ord_irt_poisson_sim@simul_data$true_person,
                                                                   decreasing=F)[1],
                               fixtype='prefix',nchains=2,ncores=8)
## [1] "Running pathfinder to find starting values"
## Finished in  4.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 4 thread(s) per chain...
## 
## Chain 2 finished in 73.2 seconds.
## Chain 1 finished in 75.5 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 74.4 seconds.
## Total execution time: 75.7 seconds.
id_plot_rhats(ord_irt_poisson_est)

id_plot_legis(ord_irt_poisson_est,show_true = T)

id_sim_coverage(ord_irt_poisson_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_poisson_est,type='Residuals')

id_plot_sims(ord_irt_poisson_est)

ord_irt_poisson_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_poisson_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 7"
## [1] "Predicting outcomes for model ID 7."

Now Poisson-inflated:

ord_irt_poisson_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='poisson')
ord_irt_poisson_abs_est <- id_estimate(idealdata=ord_irt_poisson_abs_sim,
                               model_type=8,ncores = 8,
                               nchains=2,
                               restrict_ind_high = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_poisson_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  6.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 4 thread(s) per chain...
## 
## Chain 1 finished in 174.1 seconds.
## Chain 2 finished in 178.8 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 176.4 seconds.
## Total execution time: 178.9 seconds.
id_plot_rhats(ord_irt_poisson_abs_est)

id_plot_legis(ord_irt_poisson_abs_est,show_true = T)

id_sim_coverage(ord_irt_poisson_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_poisson_abs_est,type='Residuals')

id_plot_sims(ord_irt_poisson_abs_est)

ord_irt_poisson_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 8"
## [1] "Predicting outcomes for model ID 8."

ord_irt_poisson_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_poisson_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 8"
## [1] "Predicting outcomes for model ID 8."

A Normally-distributed outcome with no inflation:

ord_irt_normal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='normal')
ord_irt_normal_est <- id_estimate(idealdata=ord_irt_normal_sim,
                               model_type=9,restrict_sd_high = .1,
                               restrict_sd_low=.1,
                               restrict_ind_high = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_normal_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),fix_high = sort(ord_irt_normal_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_normal_sim@simul_data$true_person,
                                                                     decreasing=F)[1],
                               fixtype='prefix',compile_optim=F,
                              het_var=FALSE)
## [1] "Running pathfinder to find starting values"
## Finished in  0.6 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## Chain 3 finished in 53.3 seconds.
## Chain 2 finished in 54.2 seconds.
## Chain 1 finished in 55.3 seconds.
## Chain 4 finished in 55.5 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 54.6 seconds.
## Total execution time: 55.6 seconds.
id_plot_rhats(ord_irt_normal_est)

id_plot_legis(ord_irt_normal_est,show_true = T)

id_sim_coverage(ord_irt_normal_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_normal_est,type='Residuals')

id_plot_sims(ord_irt_normal_est)

ord_irt_normal_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_normal_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 4000 samples."
## [1] "Now on model 9"
## [1] "Predicting outcomes for model ID 9."

A Normally-distributed outcome with inflation:

ord_irt_normal_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,model_type='normal')
ord_irt_normal_abs_est <- id_estimate(idealdata=ord_irt_normal_abs_sim,
                               model_type=10,
                               restrict_ind_high = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_normal_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=4,
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  6.1 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
## Chain 1 finished in 157.1 seconds.
## Chain 2 finished in 247.3 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 202.2 seconds.
## Total execution time: 247.4 seconds.
id_plot_rhats(ord_irt_normal_abs_est)

id_plot_legis(ord_irt_normal_abs_est,show_true = T)

id_sim_coverage(ord_irt_normal_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_normal_abs_est,type='Residuals')

id_plot_sims(ord_irt_normal_abs_est)

ord_irt_normal_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 10"
## [1] "Predicting outcomes for model ID 10."

ord_irt_normal_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_normal_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 10"
## [1] "Predicting outcomes for model ID 10."

A Lognormal model with no inflation:

ord_irt_lognormal_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=F,
                               absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_est <- id_estimate(idealdata=ord_irt_lognormal_sim,
                               model_type=11,
                               restrict_ind_high = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                              fix_high = sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_lognormal_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=4,
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  0.7 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
## 
## Chain 2 finished in 24.1 seconds.
## Chain 1 finished in 25.1 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 24.6 seconds.
## Total execution time: 25.2 seconds.
id_plot_rhats(ord_irt_lognormal_est)

id_plot_legis(ord_irt_lognormal_est,show_true = T)

id_sim_coverage(ord_irt_lognormal_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_lognormal_est,type='Residuals')

id_plot_sims(ord_irt_lognormal_est)

ord_irt_lognormal_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_lognormal_est,ppc_pred=.) + 
  scale_x_log10()
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 11"
## [1] "Predicting outcomes for model ID 11."

And last but not least, a lognormal model with inflation:

ord_irt_lognormal_abs_sim <- id_sim_gen(num_person=20,num_items=100,ordinal=F,inflate=T,
                               absence_diff_mean=0,model_type='lognormal')
ord_irt_lognormal_abs_est <- id_estimate(idealdata=ord_irt_lognormal_abs_sim,
                               model_type=12,
                               restrict_ind_high = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                     decreasing=T,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),fix_high = sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                     decreasing=T)[1],fix_low = sort(ord_irt_lognormal_abs_sim@simul_data$true_person,
                                                                     decreasing=F)[1],nchains = 2,ncores=4,
                              
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  6.6 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 2 thread(s) per chain...
## Chain 2 finished in 166.2 seconds.
## Chain 1 finished in 170.8 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 168.5 seconds.
## Total execution time: 170.8 seconds.
id_plot_rhats(ord_irt_lognormal_abs_est)

id_plot_legis(ord_irt_lognormal_abs_est,show_true = T)

id_sim_coverage(ord_irt_lognormal_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

id_plot_sims(ord_irt_lognormal_abs_est,type='Residuals')

id_plot_sims(ord_irt_lognormal_abs_est)

ord_irt_lognormal_abs_est %>% 
  id_post_pred %>% 
  id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.) + 
  scale_x_log10()
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 12"
## [1] "Predicting outcomes for model ID 12."

ord_irt_lognormal_abs_est %>% 
  id_post_pred(output='missing') %>% 
  id_plot_ppc(ord_irt_lognormal_abs_est,ppc_pred=.)
## [1] "Processing posterior replications for 2000 scores using 100 posterior samples out of a total of 2000 samples."
## [1] "Now on model 12"
## [1] "Predicting outcomes for model ID 12."

Time: Random Walk

We won’t test all the possible time auto-correlation settings, only a subset of them. I use an inflated binary model as it is a very common one.

While these models are identified, it appears that sometimes given the amount of data, they are only weakly identified in the posterior and Rhat values higher than 1.1 are possible (but are not evidence of multi-modality).

First random walks:

bin_time_irt_2pl_abs_sim <- id_sim_gen(num_person=50,num_items=100,
                                       ordinal=F,inflate=T,
                               absence_diff_mean=0,
                               time_process = 'random',
                               time_points=10)

# as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#                                                                   
#                                                                   decreasing=TRUE,
#                                                         index=T)$ix[1])

# as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#                                                                    decreasing=F,
#                                                         index=T)$ix[1])

# 5*sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
#                                                                   
#                                                                   decreasing=TRUE)[1]

best_first_time <- 4 

best_second_time <- 500

bin_time_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_irt_2pl_abs_sim,
                               model_type=2,nchains=2,ncores = 6,
                               id_refresh=100,time_var = 10,
                               niters = 500,warmup = 500,const_type = "items",
                          keep_param=list(person_vary=T,person_int=F,item=TRUE,
                                                           extra=T),
                          restrict_sd_high=10,
                          restrict_sd_low=10,
                               vary_ideal_pts = 'random_walk',
                          restrict_var = F,
                          restrict_ind_high = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_time_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Path [1] :Initial log joint density = -6381.800927 
## Path [1] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes  
##              99      -3.373e+03      6.273e-03   3.098e-02    3.005e-01  1.000e+00      2476 -3.697e+03 -3.697e+03                   
## Path [1] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes  
##             106      -3.373e+03      1.193e-03   1.138e-02    1.000e+00  1.000e+00      2651 -3.552e+03 -3.552e+03                   
## Path [1] :Best Iter: [5] ELBO (-3510.375932) evaluations: (2651) 
## Finished in  1.8 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 3 thread(s) per chain...
## 
## Chain 1 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 2 Iteration:   1 / 1000 [  0%]  (Warmup) 
## Chain 1 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 2 Iteration: 100 / 1000 [ 10%]  (Warmup) 
## Chain 1 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 2 Iteration: 200 / 1000 [ 20%]  (Warmup) 
## Chain 1 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 2 Iteration: 300 / 1000 [ 30%]  (Warmup) 
## Chain 1 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 2 Iteration: 400 / 1000 [ 40%]  (Warmup) 
## Chain 1 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 1 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 2 Iteration: 500 / 1000 [ 50%]  (Warmup) 
## Chain 2 Iteration: 501 / 1000 [ 50%]  (Sampling) 
## Chain 1 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 2 Iteration: 600 / 1000 [ 60%]  (Sampling) 
## Chain 1 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 2 Iteration: 700 / 1000 [ 70%]  (Sampling) 
## Chain 1 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 2 Iteration: 800 / 1000 [ 80%]  (Sampling) 
## Chain 1 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 2 Iteration: 900 / 1000 [ 90%]  (Sampling) 
## Chain 1 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 1 finished in 51.2 seconds.
## Chain 2 Iteration: 1000 / 1000 [100%]  (Sampling) 
## Chain 2 finished in 51.7 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 51.5 seconds.
## Total execution time: 51.8 seconds.
id_plot_rhats(bin_time_irt_2pl_abs_est) + theme(text=element_text(family=""))

id_sim_coverage(bin_time_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let’s do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_irt_2pl_abs_est,plot_sim = T) + facet_wrap(~person_id,scales="free_y")

id_plot_sims(bin_time_irt_2pl_abs_est,type='Residuals')

id_plot_sims(bin_time_irt_2pl_abs_est)

Time: Splines

bin_time_spline_irt_2pl_abs_sim <- id_sim_gen(num_person=100,num_items=100,inflate=F,
                               absence_diff_mean=0,
                               time_process = 'random',
                               time_points=10,time_sd = .25)

sort_ids_high <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix

high_restrict <- c(min(sort_ids_high[1:20]),
                       round(median(sort_ids_high[1:20])),
                   max(sort_ids_high[1:20]))

sort_ids_low <- sort(bin_time_spline_irt_2pl_abs_sim@simul_data$true_reg_discrim,,
                                                                   decreasing=F,
                                                        index=T)$ix

low_restrict <- c(min(sort_ids_low[1:20]),
                       round(median(sort_ids_low[1:20])),
                   max(sort_ids_low[1:20]))

bin_time_spline_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_spline_irt_2pl_abs_sim,
                                           use_groups = F,
                               model_type=1,
                               time_center_cutoff = 50,
                               ncores = parallel::detectCores(),
                               vary_ideal_pts = 'splines',prior_only = FALSE,
                               spline_knots=NULL,spline_degree = 2,
                               const_type = "items",person_sd = 5,
                               diff_reg_sd = 5,
                               discrim_reg_scale = 2,discrim_reg_shape = 2,
                               time_var = .1,restrict_var = F,
                               restrict_ind_high = as.character(high_restrict),
                              restrict_ind_low =as.character(low_restrict),
                               fixtype='prefix',adapt_delta=0.95)
## [1] "Running pathfinder to find starting values"
## Finished in  2.7 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
## 
## Chain 2 finished in 19012.6 seconds.
## Chain 3 finished in 33786.6 seconds.
## Chain 4 finished in 33796.8 seconds.
## Chain 1 finished in 33798.0 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 30098.5 seconds.
## Total execution time: 33798.1 seconds.
id_plot_rhats(bin_time_spline_irt_2pl_abs_est)

id_sim_coverage(bin_time_spline_irt_2pl_abs_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let’s do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_spline_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")

id_plot_sims(bin_time_spline_irt_2pl_abs_est,type='Residuals')

id_plot_sims(bin_time_spline_irt_2pl_abs_est)

Time: AR(1)

And autocorrelation:

bin_time_ar_irt_2pl_abs_sim <- id_sim_gen(num_person=20,num_items=100,inflate=T,
                               absence_diff_mean=0,time_process = 'AR',time_points=20,time_sd = .1)

bin_time_ar_irt_2pl_abs_sim@score_matrix$group_id <- forcats::fct_collapse(bin_time_ar_irt_2pl_abs_sim@score_matrix$person_id,
                                                                           `1`=c("1","2","3"))

bin_time_ar_irt_2pl_abs_est <- id_estimate(idealdata=bin_time_ar_irt_2pl_abs_sim,
                               model_type=2,
                               ncores = parallel::detectCores(),
                               vary_ideal_pts = 'AR1',
                               const_type = "items",
                               restrict_ind_high = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(bin_time_ar_irt_2pl_abs_sim@simul_data$true_reg_discrim,,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  4.0 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 2 thread(s) per chain...
## 
## Chain 4 finished in 123.6 seconds.
## Chain 1 finished in 126.2 seconds.
## Chain 2 finished in 126.8 seconds.
## Chain 3 finished in 127.4 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 126.0 seconds.
## Total execution time: 127.6 seconds.
id_plot_rhats(bin_time_ar_irt_2pl_abs_est)

Let’s do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(bin_time_ar_irt_2pl_abs_est,plot_lines = 10) + guides(colour="none") + facet_wrap(~person_id,scales="free_y")

Time: Gaussian Process

The Gaussian process differs in that is a non-parametric approach to time-series modeling. It can fit very flexible curves and also works on irregularly-spaced time series.

time_gp_sim <- id_sim_gen(num_person=10,num_items=200,inflate=T,
                               absence_diff_mean=0,
                          time_process = 'GP',
                          time_points=20)
## Running MCMC with 1 chain...
## 
## Chain 1 Iteration: 1 / 1 [100%]  (Sampling) 
## Chain 1 finished in 0.0 seconds.
time_gp_est <- id_estimate(idealdata=time_gp_sim,
                               model_type=2,
                           use_vb=F,const_type="items",
                           restrict_ind_high = as.character(sort(time_gp_sim@simul_data$true_reg_discrim,
                                                                     decreasing=TRUE,
                                                        index=TRUE)$ix[1]),
                              restrict_ind_low = as.character(sort(time_gp_sim@simul_data$true_reg_discrim,,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                           nchains=2,ncores=8,warmup=1000,niters=500,
                               vary_ideal_pts = 'GP',
                               fixtype='prefix')
## [1] "Running pathfinder to find starting values"
## Finished in  14.9 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 4 thread(s) per chain...
## Chain 2 finished in 331.6 seconds.
## Chain 1 finished in 336.5 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 334.1 seconds.
## Total execution time: 336.7 seconds.
id_plot_rhats(time_gp_est)

id_sim_coverage(time_gp_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Let’s do a time-varying ideal point plot with true point overlay:

id_plot_legis_dyn(time_gp_est,plot_sim = T) +facet_wrap(~person_id)

id_plot_sims(time_gp_est,type='Residuals')

id_plot_sims(time_gp_est)

Latent Space Model

Another new addition is the latent space model, which is only a slight modification on the IRT model.

Non-inflated:

latent_noninfl_sim <- id_sim_gen(num_person=10,num_items=125,ordinal=F,
                                       inflate=F,
                                       model_type='binary',
                                       latent_space=T)
latent_noninfl_est <- id_estimate(idealdata=latent_noninfl_sim,
                               model_type=13,
                               restrict_ind_high = as.character(sort(latent_noninfl_sim@simul_data$true_person,
                                                                     decreasing=TRUE,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(latent_noninfl_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix',id_refresh=100,warmup=250,niters=250,nchains=2,ncores=2)
## [1] "Running pathfinder to find starting values"
## Path [1] :Initial log joint density = -1128.305893 
## Path [1] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes  
##              47      -6.693e+02      1.053e-03   9.242e-01    1.000e-03  1.000e-03      1176 -1.929e+03 -1.929e+03LS failed, Hessian reset 
## Path [1] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes  
##              99      -6.690e+02      2.558e-02   7.742e-01    1.891e+00  9.218e-01      2476 -1.487e+03 -1.487e+03                   
## Path [1] : Iter      log prob        ||dx||      ||grad||     alpha      alpha0      # evals       ELBO    Best ELBO        Notes  
##             181      -6.690e+02      1.081e-06   5.788e-01    8.456e-07  1.000e-03      4526 -2.735e+03 -2.735e+03LS failed, Hessian reset 
## Path [1] :Best Iter: [24] ELBO (-787.536684) evaluations: (4526) 
## Finished in  1.3 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 2 parallel chains, with 1 thread(s) per chain...
## 
## Chain 1 Iteration:   1 / 500 [  0%]  (Warmup) 
## Chain 2 Iteration:   1 / 500 [  0%]  (Warmup) 
## Chain 1 Iteration: 100 / 500 [ 20%]  (Warmup) 
## Chain 2 Iteration: 100 / 500 [ 20%]  (Warmup) 
## Chain 2 Iteration: 200 / 500 [ 40%]  (Warmup) 
## Chain 1 Iteration: 200 / 500 [ 40%]  (Warmup) 
## Chain 2 Iteration: 251 / 500 [ 50%]  (Sampling) 
## Chain 1 Iteration: 251 / 500 [ 50%]  (Sampling) 
## Chain 2 Iteration: 350 / 500 [ 70%]  (Sampling) 
## Chain 1 Iteration: 350 / 500 [ 70%]  (Sampling) 
## Chain 2 Iteration: 450 / 500 [ 90%]  (Sampling) 
## Chain 1 Iteration: 450 / 500 [ 90%]  (Sampling) 
## Chain 2 Iteration: 500 / 500 [100%]  (Sampling) 
## Chain 2 finished in 37.2 seconds.
## Chain 1 Iteration: 500 / 500 [100%]  (Sampling) 
## Chain 1 finished in 38.6 seconds.
## 
## Both chains finished successfully.
## Mean chain execution time: 37.9 seconds.
## Total execution time: 38.6 seconds.
id_plot_rhats(latent_noninfl_est)

id_plot_legis(latent_noninfl_est,show_true = T)

id_sim_coverage(latent_noninfl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal() 

Inflated:

# you can't have too few people relative to bills
latent_infl_sim <- id_sim_gen(num_person=20,num_items=125,ordinal=F,
                                       inflate=T,
                                       model_type='binary',
                                       latent_space=T)
latent_infl_est <- id_estimate(idealdata=latent_infl_sim,
                               model_type=14,
                               restrict_ind_high = as.character(sort(latent_infl_sim@simul_data$true_person,
                                                                     decreasing=TRUE,
                                                        index=T)$ix[1]),
                              restrict_ind_low = as.character(sort(latent_infl_sim@simul_data$true_person,
                                                                   decreasing=F,
                                                        index=T)$ix[1]),
                               fixtype='prefix',
                              adapt_delta=0.95,
                              diff_reg_sd = 2,
                             diff_miss_sd=2)
## [1] "Running pathfinder to find starting values"
## Finished in  1.5 seconds.
## [1] "Estimating model with full Stan MCMC sampler."
## Running MCMC with 4 parallel chains, with 1 thread(s) per chain...
## 
## Chain 1 finished in 1110.1 seconds.
## Chain 4 finished in 1117.9 seconds.
## Chain 3 finished in 1196.5 seconds.
## Chain 2 finished in 1208.1 seconds.
## 
## All 4 chains finished successfully.
## Mean chain execution time: 1158.2 seconds.
## Total execution time: 1208.2 seconds.
id_plot_rhats(latent_infl_est)

id_plot_legis(latent_infl_est,show_true = T)

id_sim_coverage(latent_infl_est) %>% 
  bind_rows(.id='Parameter') %>% 
  ggplot(aes(y=avg,x=Parameter)) +
  stat_summary(fun.args=list(mult=1.96)) + 
  theme_minimal()